# _*_coding:utf-8_*_
__author__ = 'gerry'

import pandas as pd
import matplotlib.pyplot as plt

path = 'C:\Users\Gerry\Desktop\PythonData'
data_name = '\Advertising.csv'
data = pd.read_csv(path+data_name)

def main():
    #data = pd.read_csv('C:\Users\Gerry\Desktop\PythonData\Advertising.csv')
    x = data[['TV', 'Radio', 'Newspaper']]
    y = data['Sales']

    # 绘制一
    '''plt.plot(data['TV'], y, 'ro', Label='TV')
    plt.plot(data['Radio'], y, 'g^', Label='Radio')
    plt.plot(data['Newspaper'], y, 'b*', Label='Newspaper')
    plt.legend(loc='lower right')
    plt.grid()
    plt.show()'''

    #绘制2
    plt.figure(figsize=(9,12))
    plt.subplot(311)
    plt.plot(data['TV'],y,'ro')
    plt.grid()
    plt.subplot(312)
    plt.plot(data['Radio'],y,'g^')
    plt.title('Radio')
    plt.grid()
    plt.subplot(313)
    plt.plot(data['Newspaper'],y,'b*')
    plt.title('NewPaper')
    plt.grid()
    plt.tight_layout()

    plt.show()






def Vector():
    #create a python list of feature names
    feature_cols = ['TV','Radio','Newspaper']
    # use the list to select a subset of the original DataFrame
    X=data[feature_cols]
    #equival command to do this in one line
    print(X.head())
    print
    print(type(X))
    print(X.shape)

    #select a series from the DataFrame
    y = data['Sales']
    print(y.head())


def DataSet():
    from sklearn.cross_validation import train_test_split
    from sklearn.linear_model import LinearRegression
    from sklearn import metrics
    import numpy as np

    #create a python list of feature names
    feature_cols = ['TV','Radio']
    # use the list to select a subset of the original DataFrame
    X=data[feature_cols]

    #select a series from the DataFrame
    y = data['Sales']
    X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=1)

    # print(X_train.shape)
    # print(y_train.shape)
    # print(X_test.shape)
    # print(y_test.shape)

    linreg = LinearRegression()
    model = linreg.fit(X_train,y_train)

    # print(model)
    print(linreg.intercept_)
    print(linreg.coef_)

    zip(feature_cols,linreg.coef_)

    y_pred = linreg.predict(X_test)
    # print(y_pred)
    # print(type(y_pred))

    print(type(y_pred),type(y_test))
    print(len(y_pred),len(y_test))
    print(y_pred.shape,y_test.shape)

    sum_mean=0
    for i in range(len(y_pred)):
        sum_mean+=(y_pred[i]-y_test.values[i])**2
    print("RMSE by hand:",np.sqrt(sum_mean/len(y_pred)))

    #作图
    plt.figure()
    plt.plot(range(len(y_pred)),y_pred,'b-',label = 'predict')
    plt.plot(range(len(y_pred)),y_test,'r-',label = 'test')
    plt.legend(loc = 'upper right') #显示图中的标签
    plt.xlabel("the number of sales")
    plt.ylabel('value of sales')
    plt.show()


if __name__ == '__main__':
    DataSet()
